/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.fr;

import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;

/** Test case for FrenchAnalyzer. */
public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {

  public void testAnalyzer() throws Exception {
    FrenchAnalyzer fa = new FrenchAnalyzer();

    assertAnalyzesTo(fa, "", new String[] {});

    assertAnalyzesTo(fa, "chien chat cheval", new String[] {"chien", "chat", "cheval"});

    assertAnalyzesTo(fa, "chien CHAT CHEVAL", new String[] {"chien", "chat", "cheval"});

    assertAnalyzesTo(
        fa, "  chien  ,? + = -  CHAT /: > CHEVAL", new String[] {"chien", "chat", "cheval"});

    assertAnalyzesTo(fa, "chien++", new String[] {"chien"});

    assertAnalyzesTo(fa, "mot \"entreguillemet\"", new String[] {"mot", "entreguilemet"});

    // let's do some french specific tests now
    /* 1. couldn't resist
      I would expect this to stay one term as in French the minus
    sign is often used for composing words */
    assertAnalyzesTo(fa, "Jean-François", new String[] {"jean", "francoi"});

    // 2. stopwords
    assertAnalyzesTo(
        fa, "le la chien les aux chat du des à cheval", new String[] {"chien", "chat", "cheval"});

    // some nouns and adjectives
    assertAnalyzesTo(
        fa,
        "lances chismes habitable chiste éléments captifs",
        new String[] {"lanc", "chism", "habitabl", "chist", "element", "captif"});

    // some verbs
    assertAnalyzesTo(
        fa,
        "finissions souffrirent rugissante",
        new String[] {"finision", "soufrirent", "rugisant"});

    // some everything else
    // aujourd'hui stays one term which is OK
    assertAnalyzesTo(
        fa,
        "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
        new String[] {"c3po", "aujourd'hui", "oeuf", "ïaöuaä", "anticonstitutionel", "java"});

    // some more everything else
    // here 1940-1945 stays as one term, 1940:1945 not ?
    assertAnalyzesTo(
        fa,
        "33Bis 1940-1945 1940:1945 (---i+++)*",
        new String[] {"33bi", "1940", "1945", "1940", "1945", "i"});
    fa.close();
  }

  public void testReusableTokenStream() throws Exception {
    FrenchAnalyzer fa = new FrenchAnalyzer();
    // stopwords
    assertAnalyzesTo(
        fa, "le la chien les aux chat du des à cheval", new String[] {"chien", "chat", "cheval"});

    // some nouns and adjectives
    assertAnalyzesTo(
        fa,
        "lances chismes habitable chiste éléments captifs",
        new String[] {"lanc", "chism", "habitabl", "chist", "element", "captif"});
    fa.close();
  }

  public void testExclusionTableViaCtor() throws Exception {
    CharArraySet set = new CharArraySet(1, true);
    set.add("habitable");
    FrenchAnalyzer fa = new FrenchAnalyzer(CharArraySet.EMPTY_SET, set);
    assertAnalyzesTo(fa, "habitable chiste", new String[] {"habitable", "chist"});
    fa.close();

    fa = new FrenchAnalyzer(CharArraySet.EMPTY_SET, set);
    assertAnalyzesTo(fa, "habitable chiste", new String[] {"habitable", "chist"});
    fa.close();
  }

  public void testElision() throws Exception {
    FrenchAnalyzer fa = new FrenchAnalyzer();
    assertAnalyzesTo(fa, "voir l'embrouille", new String[] {"voir", "embrouil"});
    fa.close();
  }

  /** Test that stopwords are not case sensitive */
  public void testStopwordsCasing() throws IOException {
    FrenchAnalyzer a = new FrenchAnalyzer();
    assertAnalyzesTo(a, "Votre", new String[] {});
    a.close();
  }

  /** blast some random strings through the analyzer */
  public void testRandomStrings() throws Exception {
    Analyzer a = new FrenchAnalyzer();
    checkRandomData(random(), a, 200 * RANDOM_MULTIPLIER);
    a.close();
  }

  /** test accent-insensitive */
  public void testAccentInsensitive() throws Exception {
    Analyzer a = new FrenchAnalyzer();
    checkOneTerm(a, "sécuritaires", "securitair");
    checkOneTerm(a, "securitaires", "securitair");
    a.close();
  }
}
